From 2273e65e11dd0234f2f51ebaef61fc6e848d4059 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Micha=C5=82=20G=C3=B3rny?= <mgorny@gentoo.org>
Date: Thu, 10 Sep 2020 13:35:39 +0200
Subject: [PATCH 02/24] bpo-39503: CVE-2020-8492: Fix AbstractBasicAuthHandler
 (GH-18284) (GH-19304)

The AbstractBasicAuthHandler class of the urllib.request module uses
an inefficient regular expression which can be exploited by an
attacker to cause a denial of service. Fix the regex to prevent the
catastrophic backtracking. Vulnerability reported by Ben Caller
and Matt Schwager.

AbstractBasicAuthHandler of urllib.request now parses all
WWW-Authenticate HTTP headers and accepts multiple challenges per
header: use the realm of the first Basic challenge.

Co-Authored-By: Serhiy Storchaka <storchaka@gmail.com>
(cherry picked from commit 0b297d4ff1c0e4480ad33acae793fbaf4bf015b4)

[rebased for py2.7]
---
 Lib/test/test_urllib2.py | 81 ++++++++++++++++++++++++++--------------
 Lib/urllib2.py           | 60 +++++++++++++++++++++++------
 2 files changed, 101 insertions(+), 40 deletions(-)

diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py
index 20a0f58143..0adbb13c43 100644
--- a/Lib/test/test_urllib2.py
+++ b/Lib/test/test_urllib2.py
@@ -1128,42 +1128,67 @@ class HandlerTests(unittest.TestCase):
         self.assertEqual(req.get_host(), "proxy.example.com:3128")
         self.assertEqual(req.get_header("Proxy-authorization"),"FooBar")
 
-    def test_basic_auth(self, quote_char='"'):
+    def check_basic_auth(self, headers, realm):
         opener = OpenerDirector()
         password_manager = MockPasswordManager()
         auth_handler = urllib2.HTTPBasicAuthHandler(password_manager)
-        realm = "ACME Widget Store"
-        http_handler = MockHTTPHandler(
-            401, 'WWW-Authenticate: Basic realm=%s%s%s\r\n\r\n' %
-            (quote_char, realm, quote_char) )
+        body = '\r\n'.join(headers) + '\r\n\r\n'
+        http_handler = MockHTTPHandler(401, body)
         opener.add_handler(auth_handler)
         opener.add_handler(http_handler)
         self._test_basic_auth(opener, auth_handler, "Authorization",
                               realm, http_handler, password_manager,
                               "http://acme.example.com/protected",
-                              "http://acme.example.com/protected"
-                             )
-
-    def test_basic_auth_with_single_quoted_realm(self):
-        self.test_basic_auth(quote_char="'")
-
-    def test_basic_auth_with_unquoted_realm(self):
-        opener = OpenerDirector()
-        password_manager = MockPasswordManager()
-        auth_handler = urllib2.HTTPBasicAuthHandler(password_manager)
-        realm = "ACME Widget Store"
-        http_handler = MockHTTPHandler(
-            401, 'WWW-Authenticate: Basic realm=%s\r\n\r\n' % realm)
-        opener.add_handler(auth_handler)
-        opener.add_handler(http_handler)
-        msg = "Basic Auth Realm was unquoted"
-        with test_support.check_warnings((msg, UserWarning)):
-            self._test_basic_auth(opener, auth_handler, "Authorization",
-                                  realm, http_handler, password_manager,
-                                  "http://acme.example.com/protected",
-                                  "http://acme.example.com/protected"
-                                 )
-
+                              "http://acme.example.com/protected")
+
+    def test_basic_auth(self):
+        realm = "realm2@example.com"
+        realm2 = "realm2@example.com"
+        basic = 'Basic realm="{realm}"'.format(realm=realm)
+        basic2 = 'Basic realm="{realm2}"'.format(realm2=realm2)
+        other_no_realm = 'Otherscheme xxx'
+        digest = ('Digest realm="{realm2}", '
+                  'qop="auth, auth-int", '
+                  'nonce="dcd98b7102dd2f0e8b11d0f600bfb0c093", '
+                  'opaque="5ccc069c403ebaf9f0171e9517f40e41"'
+                  .format(realm2=realm2))
+        for realm_str in (
+            # test "quote" and 'quote'
+            'Basic realm="{realm}"'.format(realm=realm),
+            "Basic realm='{realm}'".format(realm=realm),
+
+            # charset is ignored
+            'Basic realm="{realm}", charset="UTF-8"'.format(realm=realm),
+
+            # Multiple challenges per header
+            ', '.join((basic, basic2)),
+            ', '.join((basic, other_no_realm)),
+            ', '.join((other_no_realm, basic)),
+            ', '.join((basic, digest)),
+            ', '.join((digest, basic)),
+        ):
+            headers = ['WWW-Authenticate: {realm_str}'
+                       .format(realm_str=realm_str)]
+            self.check_basic_auth(headers, realm)
+
+        # no quote: expect a warning
+        with test_support.check_warnings(("Basic Auth Realm was unquoted",
+                                     UserWarning)):
+            headers = ['WWW-Authenticate: Basic realm={realm}'
+                       .format(realm=realm)]
+            self.check_basic_auth(headers, realm)
+
+        # Multiple headers: one challenge per header.
+        # Use the first Basic realm.
+        for challenges in (
+            [basic,  basic2],
+            [basic,  digest],
+            [digest, basic],
+        ):
+            headers = ['WWW-Authenticate: {challenge}'
+                       .format(challenge=challenge)
+                       for challenge in challenges]
+            self.check_basic_auth(headers, realm)
 
     def test_proxy_basic_auth(self):
         opener = OpenerDirector()
diff --git a/Lib/urllib2.py b/Lib/urllib2.py
index 8b634ada37..b2d1fad6f2 100644
--- a/Lib/urllib2.py
+++ b/Lib/urllib2.py
@@ -856,8 +856,15 @@ class AbstractBasicAuthHandler:
 
     # allow for double- and single-quoted realm values
     # (single quotes are a violation of the RFC, but appear in the wild)
-    rx = re.compile('(?:.*,)*[ \t]*([^ \t]+)[ \t]+'
-                    'realm=(["\']?)([^"\']*)\\2', re.I)
+    rx = re.compile('(?:^|,)'   # start of the string or ','
+                    '[ \t]*'    # optional whitespaces
+                    '([^ \t]+)' # scheme like "Basic"
+                    '[ \t]+'    # mandatory whitespaces
+                    # realm=xxx
+                    # realm='xxx'
+                    # realm="xxx"
+                    'realm=(["\']?)([^"\']*)\\2',
+                    re.I)
 
     # XXX could pre-emptively send auth info already accepted (RFC 2617,
     # end of section 2, and section 1.2 immediately after "credentials"
@@ -869,23 +876,52 @@ class AbstractBasicAuthHandler:
         self.passwd = password_mgr
         self.add_password = self.passwd.add_password
 
+    def _parse_realm(self, header):
+        # parse WWW-Authenticate header: accept multiple challenges per header
+        found_challenge = False
+        for mo in AbstractBasicAuthHandler.rx.finditer(header):
+            scheme, quote, realm = mo.groups()
+            if quote not in ['"', "'"]:
+                warnings.warn("Basic Auth Realm was unquoted",
+                              UserWarning, 3)
+
+            yield (scheme, realm)
+
+            found_challenge = True
+
+        if not found_challenge:
+            if header:
+                scheme = header.split()[0]
+            else:
+                scheme = ''
+            yield (scheme, None)
 
     def http_error_auth_reqed(self, authreq, host, req, headers):
         # host may be an authority (without userinfo) or a URL with an
         # authority
-        # XXX could be multiple headers
-        authreq = headers.get(authreq, None)
+        headers = headers.getheaders(authreq)
+        if not headers:
+            # no header found
+            return
 
-        if authreq:
-            mo = AbstractBasicAuthHandler.rx.search(authreq)
-            if mo:
-                scheme, quote, realm = mo.groups()
-                if quote not in ['"', "'"]:
-                    warnings.warn("Basic Auth Realm was unquoted",
-                                  UserWarning, 2)
-                if scheme.lower() == 'basic':
+        unsupported = None
+        for header in headers:
+            for scheme, realm in self._parse_realm(header):
+                if scheme.lower() != 'basic':
+                    unsupported = scheme
+                    continue
+
+                if realm is not None:
+                    # Use the first matching Basic challenge.
+                    # Ignore following challenges even if they use the Basic
+                    # scheme.
                     return self.retry_http_basic_auth(host, req, realm)
 
+        if unsupported is not None:
+            raise ValueError("AbstractBasicAuthHandler does not "
+                             "support the following scheme: %r"
+                             % (scheme,))
+
     def retry_http_basic_auth(self, host, req, realm):
         user, pw = self.passwd.find_user_password(realm, host)
         if pw is not None:
-- 
2.30.1

